Aim

I want to check if stimulation with 4 different nuclear receptor agonists induces changes in expression of NR target genes. This script contains basic DESeq2 analysis to do that.


Setup

Libraries

# Load dependencies
library(tidyverse)
library(GenomicRanges)
library(rtracklayer)
library(ggplot2)
library(ggbeeswarm)
library(DESeq2)
library(RColorBrewer)
library(GGally)
library(ggpubr)
library(biomaRt)
library(clusterProfiler)
library(pathview)
library(enrichplot)
library(DOSE)
library(ggnewscale)
library(ggridges)
library(europepmc)
library(GENOVA)
library(ggh4x)
library(SummarizedExperiment)
library(seriation)

# # Prepare output 
output_dir <- "mt20210525_rnaseq_processing"
dir.create(output_dir, showWarnings = FALSE)

Custom functions

ExonLength <- function(genes) {
  # Get the combined exon length for the genes
  tib <- as_tibble(genes) %>%
    filter(type == "exon") %>%
    mutate(gene_id = factor(gene_id, levels = unique(gene_id))) %>%
    group_by(gene_id) %>%
    dplyr::summarise(exon_number = n(),
              exon_length = sum(width))
  
  tib
}

# From Fede:
# ggpairs custom functions
corColor <- function(data, mapping, color = I("black"), sizeRange = c(1, 3), ...) {

  x   <- eval_data_col(data, mapping$x)
  y   <- eval_data_col(data, mapping$y)
  r   <- cor(x, y)
  rt  <- format(r, digits = 3)
  tt  <- as.character(rt)
  cex <- max(sizeRange)

  # helper function to calculate a useable size
  percent_of_range <- function(percent, range) {
    percent * diff(range) + min(range, na.rm = TRUE)
  }

  # plot correlation coefficient
  p <- ggally_text(label = tt, mapping = aes(), xP = 0.5, yP = 0.5,
                   size = I(percent_of_range(cex * abs(r), sizeRange)), color = color, ...) +
    theme(panel.grid.minor=element_blank(),
          panel.grid.major=element_blank())

  corColors <- RColorBrewer::brewer.pal(n = 7, name = "RdYlBu")[2:6]

  if (r <= boundaries[1]) {
    corCol <- corColors[1]
  } else if (r <= boundaries[3]) {
    corCol <- corColors[2]
  } else if (r < boundaries[3]) {
    corCol <- corColors[3]
  } else if (r < boundaries[4]) {
    corCol <- corColors[4]
  } else {
    corCol <- corColors[5]
  }

  p <- p +
    theme(panel.background = element_rect(fill = corCol))

  return(p)
}

plotMAWithLabels <- function(results, main, alpha = 0.05) {
  
  # Plot MA plot with lamin genes highlighted 
  plotMA(results, ylim = c(-4, 4), main = main,
         ylab = "log2 fold change", alpha = alpha)
  points(results[c("ENSG00000165029", "ENSG00000160179"), "baseMean"],
         results[c("ENSG00000165029", "ENSG00000160179"), "log2FoldChange"],
         col = c("red"), cex = 1.5, pch = 19)
  legend("topright", 
         legend = c(paste0("non-sign (n=", sum(results$padj >= alpha, na.rm = T), ")"), 
                    paste0("sign (n=", sum(results$padj < alpha, na.rm = T), ")"), 
                    "ABCA1"), 
         col = c("black", "blue", "red"), pch = 19)
  
}

Prepare data and DESeq2

Read data files and initialize the DESeq2 object.

#######################################
## Prepare metadata

# Prepare metadata sheet
metadata <- read_tsv("ts210514_metadata_PE.tsv") %>%
  filter(grepl("HepG2", sample_name)) %>%
  dplyr::select(-fastq) %>%
  #mutate(sample_id_short = str_remove(sample_id, "_R1_001")) %>%
  separate(sample_name, c("cell", "condition", "replicate"), sep = "_", remove = F) %>%
  mutate(condition = factor(condition, levels = unique(condition)),
         replicate = factor(replicate, levels = unique(replicate)))
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   sample_id = col_character(),
##   fastq = col_character(),
##   sample_name = col_character()
## )
#######################################
## Prepare gene annotation

# Load genes
genes <- import("/home/t.v.schaik/mydata/data/gene_builds/GRCh38/gencode.v24.primary_assembly.annotation.gtf")

# Get gene lengths (total exon length)
gene_length <- ExonLength(genes)

# dplyr::select genes only
genes <- genes[genes$type == "gene"]

# Add seqinfo
# chrom_sizes <- read.table("/DATA/usr/t.v.schaik/data/genomes/GRCh38/hg38.chrom.sizes", sep = "\t")
# row.names(chrom_sizes) <- chrom_sizes[, 1]
# seqlengths(genes) <- chrom_sizes[seqlevels(genes), 2]


#######################################
## Prepare count matrix

# Read count matrix corresponding to the metadata
rnaseq_matrix <- read_tsv("/DATA/scratch/usr/t.v.schaik/proj/3D_nucleus/results/ts210514_RNAseq_Ki67_depletion/results_pe/Star/count_table.tsv") %>%
  dplyr::select(1, matches("HepG2")) %>%
  rename_at(vars(names(.)), ~ c("ensembl_id", metadata$sample_name))
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double(),
##   sample.id = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
# Filter for strange chromosomes
idx_chromosome <- which(seqnames(genes) %in% c(paste0("chr", 1:22),
                                               "chrX"))

genes <- genes[idx_chromosome]
rnaseq_matrix <- rnaseq_matrix[idx_chromosome, ]

# Filter for protein coding genes (for now)
filter <- c("protein_coding")
idx_filter <- which(genes$gene_type %in% filter)

genes <- genes[idx_filter]
rnaseq_matrix <- rnaseq_matrix[idx_filter, ]

# Filter for genes with no counts
tib_nocounts <- rnaseq_matrix %>%
  mutate(ensembl_id = factor(ensembl_id, levels = ensembl_id)) %>%
  gather(key, value, -ensembl_id) %>%
  group_by(ensembl_id) %>%
  dplyr::summarise(count = sum(value > 2) > 2)
idx_nocounts <- which(tib_nocounts$count)

genes <- genes[idx_nocounts]
rnaseq_matrix <- rnaseq_matrix[idx_nocounts, ]

# Add gene names
ensembl <- useMart("ensembl")
mart <- useDataset("hsapiens_gene_ensembl", mart = ensembl)
rnaseq_matrix <- rnaseq_matrix %>%
  mutate(ensembl_id = gsub(".[0-9]+$", "", ensembl_id))
gene <- rnaseq_matrix$ensembl_id
G_list <- getBM(filters= "ensembl_gene_id", attributes= c("ensembl_gene_id","hgnc_symbol"),values=gene,mart= mart)
rnaseq_matrix <- merge(rnaseq_matrix,G_list,by.x="ensembl_id",by.y="ensembl_gene_id")

1. PCA plot

#######################################
## Prepare count matrix

# Convert into data.frame with row.names for deseq2
rnaseq_counts_2 <- rnaseq_matrix %>%
  filter(hgnc_symbol != "")
rnaseq_counts <- rnaseq_counts_2 %>%
  dplyr::select(-ensembl_id, -hgnc_symbol)
rnaseq_counts <- data.frame(rnaseq_counts, row.names = rnaseq_counts_2$hgnc_symbol)


#######################################
## Initialize deseq2

# Prepare metadata ready for deseq
metadata_df <- data.frame(metadata)
metadata_df[] <- lapply(metadata_df, function(x) gsub("-", "_", x))

# Initialize
rnaseq_dds <- DESeqDataSetFromMatrix(countData = rnaseq_counts,
                                     colData = metadata_df,
                                     design= ~ condition)
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# Execute deseq2
rnaseq_dds <- DESeq(rnaseq_dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
#######################################
## PCA analysis - quality control

# Get the "normalized" values and create PCA plot
# Note that "normalized" is simply log2 + 0.01 transformed normalized counts!
rnaseq_dds_norm <- normTransform(rnaseq_dds, pc = 0.01)
plt <- plotPCA(rnaseq_dds_norm, intgroup = c("condition"), ntop = 100)

plt + 
  theme_bw() +
  theme(aspect.ratio = 1)

The PCA plot does not look perfect. But maybe PC2 is representing biological meaning.


2. Correlation plot replicates

A quick initial test: how well do the replicates correlate with each other and between the different experiments?

#######################################
## Make a correlation plot of all samples - quality control

# Get the "normalized" counts for each experiment - this is a different
# normalized from above!
rnaseq_norm <- as_tibble(assay(rnaseq_dds_norm)) %>%
  rename_at(vars(names(.)), ~ metadata$sample_name) %>%
  add_column(hgnc_symbol = rnaseq_counts_2$hgnc_symbol)

# rnaseq_norm <- rnaseq_norm[, order(samples.df$clone)]


# Also, combine the normalized counts
rnaseq_norm_combined <- do.call(cbind,
                                tapply(as.character(metadata$sample_name),
                                       metadata$condition,
                                       function(i) rowMeans(rnaseq_norm[, i])))

tib_norm_combined <- as_tibble(rnaseq_norm_combined, 
                               .name_repair = ~c("DMSO", "FXR", "LXR", "PPARa", "SXR")) %>%
  add_column(gene_id = rnaseq_counts_2$hgnc_symbol)

tib_norm_combined <- as_tibble(genes) %>%
  dplyr::select(seqnames, start, end, strand, gene_id, gene_name) %>%
  inner_join(tib_norm_combined, by = "gene_id")


# Get a sample for plotting dplyr::select points
n <- sample(1:nrow(rnaseq_norm), 1000)

# Use GGally to make correlation plots
boundaries <- seq(from = 0.9, by = 0.01, length.out = 4)
plt <- ggpairs(rnaseq_norm %>% dplyr::select(-hgnc_symbol),
               upper = list(continuous = corColor),
               lower = list(continuous = function(data, mapping, ...) {
                   ggally_points(data = data[n, ], mapping = mapping, alpha = 0.1, size = 0.5) +
                   geom_abline(slope = 1, lty = "dashed", col = "red") +
                   theme_bw()}),
               diag = list(continuous = function(data, mapping, ...) {
                   ggally_densityDiag(data = data, mapping = mapping, alpha = 0.3, fill = "red") +
                   theme_bw()})) +
  ggtitle("Correlation gene expression") +
  xlab("Gene expression") +
  ylab("Gene expression")
  # theme_bw()

print(plt)

The main point of the figure: there is no single replicate misbehaving.


3. Clustering of correlation matrix

Are the replicates clustering together?

norm <- assay(vst(rnaseq_dds, nsub = 10))
dist <- as.dist(1 - cor(norm, method = "spearman"))

clust <- hclust(dist, method = "ward.D2")
clust <- reorder(clust, dist = dist, method = "OLO")

df <- reshape2::melt(cor(norm, method = 'spearman'))

ggplot(df, aes(Var1, Var2, fill = value)) +
  geom_raster() +
  guides(y.sec = "axis", x.sec = guide_axis(position = "bottom")) +
  GENOVA:::scale_fill_GENOVA_div(name = expression("Spearman's "*rho)) +
  scale_x_dendrogram(hclust = clust, name = NULL,
                     guide = guide_dendro(label = FALSE, position = "top")) +
  scale_y_dendrogram(hclust = clust, name = NULL,
                     guide = guide_dendro(label = FALSE)) +
  theme(axis.text.x.bottom = element_text(angle = 90, hjust = 1, vjust = 0.5))

It looks like replicate 3 clusters away from replicate 1 & 2. The stimulations do not really cluster together. If anything, LXR, SXR and PPARa agonists have similar effects and cluster away from FXR and DMSO.


4. Differential analysis

Identifying indivudal genes that change expression upon stimulation.

#######################################
## Differential analysis - test for lfc > 0.5

# Differential analysis
diff_results <- results(rnaseq_dds,
                       contrast = c("condition", "LXR", "DMSO"))



# MA plots with significant hits
ggmaplot(diff_results, main = expression("DMSO" %->% "LXR"),
   fdr = 0.05, fc = 1.5, size = 1,
   palette = c("#B31B21", "#1465AC", "darkgray"),
   genenames = as.vector(diff_results$name),
   legend = "top", top = 100,
   font.label = c("bold", 11),
   font.legend = "bold",
   font.main = "bold",
   ggtheme = ggplot2::theme_bw())
## Warning: ggrepel: 85 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

# Extract hits from all stimulations
df_changed_LXR <- data.frame("gene" = rownames(diff_results),
                         "basal_exp" = diff_results$baseMean,
                         "lfc" = diff_results$log2FoldChange, 
                         "p_value" = diff_results$padj,
                         "NR" = "LXR")


diff_results_PPARa <- results(rnaseq_dds,
                       contrast = c("condition", "PPARa", "DMSO"))

ggmaplot(diff_results_PPARa, main = expression("DMSO" %->% "PPARa"),
   fdr = 0.05, fc = 1.5, size = 1,
   palette = c("#B31B21", "#1465AC", "darkgray"),
   genenames = as.vector(diff_results$name),
   legend = "top", top = 100,
   font.label = c("bold", 11),
   font.legend = "bold",
   font.main = "bold",
   ggtheme = ggplot2::theme_bw())
## Warning: ggrepel: 92 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

df_changed_PPARa <- data.frame("gene" = rownames(diff_results_PPARa),
                         "basal_exp" = diff_results_PPARa$baseMean,
                         "lfc" = diff_results_PPARa$log2FoldChange, 
                         "p_value" = diff_results_PPARa$padj,
                         "NR" = "PPARa")

diff_results_SXR <- results(rnaseq_dds,
                       contrast = c("condition", "SXR", "DMSO"))

ggmaplot(diff_results_SXR, main = expression("DMSO" %->% "SXR"),
   fdr = 0.05, fc = 1.5, size = 1,
   palette = c("#B31B21", "#1465AC", "darkgray"),
   genenames = as.vector(diff_results$name),
   legend = "top", top = 100,
   font.label = c("bold", 11),
   font.legend = "bold",
   font.main = "bold",
   ggtheme = ggplot2::theme_bw())
## Warning: ggrepel: 92 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

df_changed_SXR <- data.frame("gene" = rownames(diff_results_SXR),
                         "basal_exp" = diff_results_SXR$baseMean,
                         "lfc" = diff_results_SXR$log2FoldChange, 
                         "p_value" = diff_results_SXR$padj,
                         "NR" = "SXR")

diff_results_FXR <- results(rnaseq_dds,
                       contrast = c("condition", "FXR", "DMSO"))

ggmaplot(diff_results_FXR, main = expression("DMSO" %->% "FXR"),
   fdr = 0.05, fc = 1.5, size = 1,
   palette = c("#B31B21", "#1465AC", "darkgray"),
   genenames = as.vector(diff_results$name),
   legend = "top", top = 100,
   font.label = c("bold", 11),
   font.legend = "bold",
   font.main = "bold",
   ggtheme = ggplot2::theme_bw())
## Warning: ggrepel: 46 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

df_changed_FXR <- data.frame("gene" = rownames(diff_results_FXR),
                         "basal_exp" = diff_results_FXR$baseMean,
                         "lfc" = diff_results_FXR$log2FoldChange, 
                         "p_value" = diff_results_FXR$padj,
                         "NR" = "FXR")

df_changed <- rbind(df_changed_LXR, df_changed_PPARa, df_changed_SXR, df_changed_FXR)


# df_changed <- df_changed %>%
#   filter(p_value <= 0.1)

Despite the poor global trends, there seem to be a fraction of genes significantly upregulated (even though FC are not very high). At a first glance, these genes seem to be relevant.


5. Gene ontology of changed set of genes

Can the upregulated genes be assigned to specific groups of genes? This might tell us if we should focus on a specific set of genes.

# SET THE DESIRED ORGANISM HERE
organism = "org.Hs.eg.db"
library(organism, character.only = TRUE)
## Loading required package: AnnotationDbi
## 
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:clusterProfiler':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
# Gene set enrichment analysis for each stimulation
for (i in unique(df_changed$NR)) {
  if (i != "FXR") {
  print(i)
  
  # we want the log2 fold change 
  gene_list <- df_changed$lfc[df_changed$NR == i]
  
  # name the vector
  names(gene_list) <- df_changed$gene[df_changed$NR == i]
  gene_list <- na.omit(gene_list)
  
  # sort the list in decreasing order (required for clusterProfiler)
  gene_list = sort(gene_list, decreasing = TRUE)
  
  # Compute the gene set enrichment
  gse <- gseGO(geneList = gene_list, 
             ont = "MF",
             keyType = "SYMBOL", 
             OrgDb = org.Hs.eg.db)
  
  # Plot the importance of the gene sets
  p<-enrichplot::dotplot(gse, showCategory=10, split=".sign", orderBy = "x") + facet_grid(.~.sign) + ggtitle(paste(i))
  print(p)
  
  # Plot the interaction of genes within sets
  p<-cnetplot(gse, categorySize="pvalue", foldChange=gene_list, showCategory = 3) + ggtitle(paste(i))
  print(p)
  
  # Plot the enrichment distribution
  p<-ridgeplot(gse) + labs(x = "enrichment distribution") + ggtitle(paste(i))
  print(p)
  
  # Distribution of genes in most important gene set
  p<-gseaplot(gse, by = "all", title = paste(gse$Description[1], i), geneSetID = 1)
  print(p)
  
  # How relevant are the identified gene sets?
  terms <- gse$Description[1:3]
  p<-pmcplot(terms, 2010:2020, proportion=FALSE) + theme_bw() + ggtitle(paste(i))
  print(p)
  }
}
## [1] "LXR"
## preparing geneSet collections...
## GSEA analysis...
## leading edge analysis...
## done...

## Picking joint bandwidth of 0.25

## [1] "PPARa"
## preparing geneSet collections...
## GSEA analysis...
## leading edge analysis...
## done...

## Warning: ggrepel: 37 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## Picking joint bandwidth of 0.185

## [1] "SXR"
## preparing geneSet collections...
## GSEA analysis...
## leading edge analysis...
## done...

## Warning: ggrepel: 246 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## Picking joint bandwidth of 0.134


6. Overlap with nuclear receptor motif

Finally, let’s ask the important question: do the upregulated genes have relevant NR motifs in their promoters/enhancers?


Plot the enrichment of differential genes within LADs.

# #######################################
# ## Create summarizing tibble - only using active genes
# 
# tib <- as_tibble(diff_results) %>%
#   dplyr::select(baseMean, log2FoldChange, padj) %>%
#   add_column(ensembl_id = genes$gene_id,
#              overlaps_LAD = genes$overlaps_LAD,
#              overlaps_NAD = genes$overlaps_NAD) %>%
#   mutate(padj = replace_na(padj, 1))
# 
# tib_gather <- tib %>%
#   dplyr::select(-starts_with("log")) %>%
#   gather(key, value, contains("overlaps")) %>%
#   mutate(sign = padj < 0.05)
# 
# # Group by LAD overlap or significance
# tib_diff <- tib_gather %>%
#   group_by(key, value) %>%
#   dplyr::summarise(fraction = mean(sign),
#             total = sum(sign),
#             background = length(sign))
# 
# #######################################
# ## Plot results
# 
# tib_diff %>%
#   ggplot(aes(x = key, y = fraction, fill = value)) +
#     geom_bar(stat = "identity", position = "dodge", col = "black") +
#     xlab("Knockout cells") +
#     ylab("Fraction differentially expressed") +
#     scale_fill_manual(values = c("grey", "black"), name = "Overlaps LAD") +
#     theme_bw() +
#     theme(aspect.ratio = 1)
# 
# tib_diff %>%
#   ggplot(aes(x = key, y = total, fill = value)) +
#     geom_bar(stat = "identity", position = "dodge", col = "black") +
#     xlab("Knockout cells") +
#     ylab("Total differentially expressed") +
#     scale_fill_manual(values = c("grey", "black"), name = "Overlaps LAD") +
#     theme_bw() +
#     theme(aspect.ratio = 1)
# 
# 
# ```
# 
# Not significant.
# 
# The analysis above is based on differential gene expression and simply asking 
# whether LAD genes are over-represented. However, there is a bias in gene 
# expression levels in LAD genes compared to iLAD genes: they often are more lowly
# expressed. It could that this bias also affects the result. To rule this out, 
# let's create a matched set (Christ's script) and use that.
# 
# ```{r repeat with matched set, fig.width = 5, fig.height = 3.5}
# 
# ## Function by Christ Leemans
# ## Create a matched set
# ##
# ## get a table with matching sets
# ## table = complete table to take matching sets from
# ## class_col = column name of class of interest
# ## class = name of class to match the set on
# ## order_on = column name to order on
# ## bs = bin size to sample equal number of items from
# matchSet <- function(table, class_col, class, order_on, bs=10){
#   # order by value of interest
#   o_vec = order(table[,order_on])
#   o_table = table[o_vec, ]
#   set_A = which(o_table[,class_col]==class)
# 
#   # define bins that cover the range of set A
#   n = length(o_vec)
#   bin_n = floor((n - set_A[1] - 1) / bs)
#   seq_vec = seq(n-bin_n*bs, n, bs)
# 
#   # take a matching set B
#   set_B = c()
#   for(i in 1:(length(seq_vec)-1)){
#     sub_table = o_table[(seq_vec[i] + 1):seq_vec[i + 1], ]
#     sub_A = which(sub_table[,class_col]==class)
#     if (length(sub_A) < bs/2){
#         sub_B = sample(which(sub_table[,class_col]!=class), length(sub_A))
#     } else {
#         sub_B = which(sub_table[,class_col]!=class)
#     }
#     set_B = c(set_B, sub_B + seq_vec[i])
#   }
#   ## can also return o_table[c(setA, setB), ]
#   ## but this way order is perserved.
#   i_vec = o_vec[c(set_A, set_B)]
#   return(table[i_vec[order(i_vec)], ])
# }
# 
# tib_matched <- tibble()
# set.seed(123)
# for (i in 1:50) {
#   tmp <- matchSet(table = as.data.frame(tib), 
#                   class_col = "overlaps_LAD", 
#                   class = T, 
#                   order_on = "baseMean")
#   tmp <- as_tibble(tmp) %>%
#     mutate(match = i)
#   tib_matched <- bind_rows(tib_matched, tmp)
# }
# 
# # tib_matched <- matchSet(table = as.data.frame(tib), 
# #                         class_col = "overlaps_LAD", 
# #                         class = T, 
# #                         order_on = "baseMean")
# # tib_matched <- as_tibble(tib_matched)
# 
# # Check whether this worked
# tib %>% 
#   ggplot(aes(x = overlaps_LAD, y = log2(baseMean+1))) +
#   geom_violin(fill = "grey") +
#   theme_bw() +
#   theme(aspect.ratio = 2)
# 
# tib_matched %>% 
#   ggplot(aes(x = overlaps_LAD, y = log2(baseMean+1))) +
#   geom_violin(fill = "grey") +
#   theme_bw() +
#   theme(aspect.ratio = 2)
# 
# 
# 
# # Repeat enrichment plot
# tib_gather <- tib_matched %>%
#   dplyr::select(-starts_with("log")) %>%
#   gather(key, value, contains("overlaps")) %>%
#   mutate(sign = padj < 0.05)
# 
# # Group by LAD overlap or significance
# tib_diff <- tib_gather %>%
#   group_by(key, value) %>%
#   dplyr::summarise(fraction = mean(sign),
#             total = sum(sign),
#             background = length(sign))
# 
# #######################################
# ## Plot results
# 
# tib_diff %>%
#   ggplot(aes(x = key, y = fraction, fill = value)) +
#     geom_bar(stat = "identity", position = "dodge", col = "black") +
#     xlab("Knockout cells") +
#     ylab("Fraction differentially expressed") +
#     scale_fill_manual(values = c("grey", "black"), name = "Overlaps LAD") +
#     theme_bw() +
#     theme(aspect.ratio = 1)
# 
# tib_diff %>%
#   ggplot(aes(x = key, y = total, fill = value)) +
#     geom_bar(stat = "identity", position = "dodge", col = "black") +
#     xlab("Knockout cells") +
#     ylab("Total differentially expressed") +
#     scale_fill_manual(values = c("grey", "black"), name = "Overlaps LAD") +
#     theme_bw() +
#     theme(aspect.ratio = 1)

Conclusion

Effects are mild, but make sense.


SessionInfo

sessionInfo()
## R version 4.0.5 (2021-03-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] org.Hs.eg.db_3.12.0         AnnotationDbi_1.52.0       
##  [3] seriation_1.2-9             ggh4x_0.1.2.1.9            
##  [5] GENOVA_1.0.0                europepmc_0.4              
##  [7] ggridges_0.5.3              ggnewscale_0.4.5           
##  [9] DOSE_3.16.0                 enrichplot_1.10.2          
## [11] pathview_1.30.1             clusterProfiler_3.18.1     
## [13] biomaRt_2.46.3              ggpubr_0.4.0               
## [15] GGally_2.1.1                RColorBrewer_1.1-2         
## [17] DESeq2_1.30.1               SummarizedExperiment_1.20.0
## [19] Biobase_2.50.0              MatrixGenerics_1.2.1       
## [21] matrixStats_0.58.0          ggbeeswarm_0.6.0           
## [23] rtracklayer_1.50.0          GenomicRanges_1.42.0       
## [25] GenomeInfoDb_1.26.7         IRanges_2.24.1             
## [27] S4Vectors_0.28.1            BiocGenerics_0.36.1        
## [29] forcats_0.5.1               stringr_1.4.0              
## [31] dplyr_1.0.5                 purrr_0.3.4                
## [33] readr_1.4.0                 tidyr_1.1.3                
## [35] tibble_3.1.1                ggplot2_3.3.3              
## [37] tidyverse_1.3.1            
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.1               tidyselect_1.1.1         RSQLite_2.2.7           
##   [4] TSP_1.1-10               grid_4.0.5               BiocParallel_1.24.1     
##   [7] scatterpie_0.1.6         munsell_0.5.0            codetools_0.2-18        
##  [10] withr_2.4.2              colorspace_2.0-0         GOSemSim_2.16.1         
##  [13] highr_0.9                knitr_1.33               rstudioapi_0.13         
##  [16] ggsignif_0.6.1           labeling_0.4.2           KEGGgraph_1.50.0        
##  [19] urltools_1.7.3           GenomeInfoDbData_1.2.4   polyclip_1.10-0         
##  [22] bit64_4.0.5              farver_2.1.0             downloader_0.4          
##  [25] vctrs_0.3.8              generics_0.1.0           xfun_0.22               
##  [28] BiocFileCache_1.14.0     R6_2.5.0                 graphlayouts_0.7.1      
##  [31] locfit_1.5-9.4           bitops_1.0-7             cachem_1.0.4            
##  [34] reshape_0.8.8            fgsea_1.16.0             DelayedArray_0.16.3     
##  [37] assertthat_0.2.1         scales_1.1.1             ggraph_2.0.5            
##  [40] beeswarm_0.3.1           gtable_0.3.0             tidygraph_1.2.0         
##  [43] rlang_0.4.10             genefilter_1.72.1        splines_4.0.5           
##  [46] rstatix_0.7.0            broom_0.7.6              BiocManager_1.30.12     
##  [49] yaml_2.2.1               reshape2_1.4.4           abind_1.4-5             
##  [52] modelr_0.1.8             backports_1.2.1          qvalue_2.22.0           
##  [55] tools_4.0.5              ellipsis_0.3.2           jquerylib_0.1.4         
##  [58] ggdendro_0.1.22          Rcpp_1.0.6               plyr_1.8.6              
##  [61] progress_1.2.2           zlibbioc_1.36.0          RCurl_1.98-1.3          
##  [64] prettyunits_1.1.1        openssl_1.4.3            viridis_0.6.0           
##  [67] cowplot_1.1.1            haven_2.4.1              ggrepel_0.9.1           
##  [70] fs_1.5.0                 magrittr_2.0.1           data.table_1.14.0       
##  [73] DO.db_2.9                openxlsx_4.2.3           triebeard_0.3.0         
##  [76] reprex_2.0.0             hms_1.0.0                evaluate_0.14           
##  [79] xtable_1.8-4             XML_3.99-0.6             rio_0.5.26              
##  [82] readxl_1.3.1             gridExtra_2.3            compiler_4.0.5          
##  [85] crayon_1.4.1             shadowtext_0.0.8         htmltools_0.5.1.1       
##  [88] geneplotter_1.68.0       lubridate_1.7.10         DBI_1.1.1               
##  [91] tweenr_1.0.2             dbplyr_2.1.1             MASS_7.3-53.1           
##  [94] rappdirs_0.3.3           Matrix_1.3-2             car_3.0-10              
##  [97] cli_2.5.0                igraph_1.2.6             pkgconfig_2.0.3         
## [100] registry_0.5-1           rvcheck_0.1.8            GenomicAlignments_1.26.0
## [103] foreign_0.8-81           foreach_1.5.1            xml2_1.3.2              
## [106] annotate_1.68.0          vipor_0.4.5              bslib_0.2.4             
## [109] XVector_0.30.0           rvest_1.0.0              digest_0.6.27           
## [112] graph_1.68.0             Biostrings_2.58.0        rmarkdown_2.7           
## [115] cellranger_1.1.0         fastmatch_1.1-0          curl_4.3                
## [118] Rsamtools_2.6.0          lifecycle_1.0.0          jsonlite_1.7.2          
## [121] carData_3.0-4            viridisLite_0.4.0        askpass_1.1             
## [124] fansi_0.4.2              pillar_1.6.0             lattice_0.20-41         
## [127] KEGGREST_1.30.1          fastmap_1.1.0            httr_1.4.2              
## [130] survival_3.2-10          GO.db_3.12.1             glue_1.4.2              
## [133] zip_2.1.1                iterators_1.0.13         png_0.1-7               
## [136] bit_4.0.4                Rgraphviz_2.34.0         ggforce_0.3.3           
## [139] stringi_1.5.3            sass_0.3.1               blob_1.2.1              
## [142] memoise_2.0.0